import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import joypy
import seaborn as sns
sns.set()
import os
import matplotlib.cm as cm
from matplotlib.cm import ScalarMappable
from matplotlib.lines import Line2D
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from textwrap import wrap
world_champions_file = 'worldChampions.csv'
world_champions = pd.read_csv(world_champions_file)
world_champions = world_champions.drop(columns=['Unnamed: 0'])
ordered_world_champions = world_champions.sort_values(by='WorldTitles')
category_codes = pd.Categorical(ordered_world_champions['WorldTitles']).codes
colormap = ["#154500", "#7db208", "#bcdd7a", "#acdffe", "#4e9bf8", "#0b2b81"]
colors = ['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'gold', 'grey', 'grey', 'grey', 'darkorange',
'mediumblue', 'limegreen', 'red', 'turquoise']
title_max = np.max(ordered_world_champions['WorldTitles'])
title_min = np.min(ordered_world_champions['WorldTitles'])
def scale_to_interval(x, low=10, high=70):
return ((x - title_min) / (title_max - title_min)) * (high - low) + low
category_codes = pd.Categorical(ordered_world_champions['WorldTitles']).codes
colormap = ["#154500", "#7db208", "#bcdd7a", "#acdffe", "#4e9bf8", "#0b2b81"]
colors = ['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'orchid', 'grey', 'grey', 'grey', 'darkorange',
'mediumblue', 'limegreen', 'red', 'turquoise']
angles = np.linspace(0, 2 * np.pi, len(ordered_world_champions['WorldTitles']), endpoint=False)
heights = np.array(ordered_world_champions['WorldTitles'])
plus = 15
fig, ax = plt.subplots(figsize=(25, 25), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
ax.set_facecolor("white")
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
ax.vlines(angles, 0 + plus, heights + plus, color=colors, lw=2)
ax.scatter(angles, heights + plus, color=colors)
ax.spines["start"].set_color("none")
ax.spines["polar"].set_color("none")
ax.grid(False)
ax.set_xticks([])
ax.set_yticklabels([])
grey1 = "#d6d6d6"
grey2 = "#cccccc"
grey3 = "#c1c1c1"
grey4 = "#b7b7b7"
grey5 = "#adadad"
grey6 = "#a3a3a3"
grey7 = "#999999"
hangles = np.linspace(0, 2 * np.pi, 200)
ax.plot(hangles, np.repeat(plus, 200), color=grey1, lw=0.7)
ax.plot(hangles, np.repeat(1 + plus, 200), color=grey1, lw=0.7)
ax.plot(hangles, np.repeat(2 + plus, 200), color=grey2, lw=0.7)
ax.plot(hangles, np.repeat(3 + plus, 200), color=grey3, lw=0.7)
ax.plot(hangles, np.repeat(4 + plus, 200), color=grey4, lw=0.7)
ax.plot(hangles, np.repeat(5 + plus, 200), color=grey5, lw=0.7)
ax.plot(hangles, np.repeat(6 + plus, 200), color=grey6, lw=0.7)
ax.plot(hangles, np.repeat(7 + plus, 200), color=grey7, lw=0.7)
for angle, height, driver, titles in zip(angles, heights, ordered_world_champions['Driver'], ordered_world_champions['WorldTitles']):
ax.text(angle, height + plus, f"{driver} ({titles})", ha='left', va='center',
fontsize=12, fontstyle='italic', fontfamily='serif', color='black')
import os
directory = 'images'
if not os.path.exists(directory):
os.makedirs(directory)
fig.savefig(f'{directory}/WorldChampions.svg', format='svg', dpi=1200, facecolor='w', bbox_inches='tight', pad_inches=0)
/Users/sainishanth/anaconda3/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
world_champions_file = 'worldChampions.csv'
world_champions = pd.read_csv(world_champions_file)
print(world_champions)
Unnamed: 0 Driver WorldTitles 0 0 Nino Farina 1 1 1 Juan Manuel Fangio 5 2 2 Alberto Ascari 2 3 3 Mike Hawthorn 1 4 4 Jack Brabham 3 5 5 Phil Hill 1 6 6 Graham Hill 2 7 7 Jim Clark 2 8 8 John Surtees 1 9 9 Denny Hulme 1 10 10 Jackie Stewart 3 11 11 Jochen Rindt 1 12 12 Emerson Fittipaldi 2 13 13 Niki Lauda 3 14 14 James Hunt 1 15 15 Mario Andretti 1 16 16 Jody Scheckter 1 17 17 Alan Jones 1 18 18 Nelson Piquet 3 19 19 Keke Rosberg 1 20 20 Alain Prost 4 21 21 Ayrton Senna 3 22 22 Nigel Mansell 1 23 23 Michael Schumacher 7 24 24 Damon Hill 1 25 25 Jacques Villeneuve 1 26 26 Mika Häkkinen 2 27 27 Fernando Alonso 2 28 28 Kimi Räikkönen 1 29 29 Lewis Hamilton 7 30 30 Jenson Button 1 31 31 Sebastian Vettel 4 32 32 Nico Rosberg 1 33 33 Max Verstappen 1
import pandas as pd
import altair as alt
# Assuming you've already loaded the data into 'world_champions' dataframe
# Define the heatmap chart
heatmap = alt.Chart(world_champions).mark_rect().encode(
x=alt.X('Driver:N', title='Driver', sort=None), # Sort by original order
y=alt.Y('WorldTitles:O', title='World Titles', sort=None), # Sort by original order
color=alt.Color('WorldTitles:Q', title='World Titles', scale=alt.Scale(scheme='viridis'))
).properties(
width=600,
height=300,
title='World Championships by Driver'
)
# Add text labels
text = heatmap.mark_text(baseline='middle').encode(
text='WorldTitles:Q',
color=alt.condition(
alt.datum.WorldTitles > 3,
alt.value('white'),
alt.value('black')
)
)
# Combine the heatmap and text
chart = (heatmap + text).configure_axis(
labelFontSize=12,
titleFontSize=14
)
# Show the chart
chart
import pandas as pd
from plotnine import *
import numpy as np
results = ('results.csv')
results = pd.read_csv(results)
races = ('races.csv')
races = pd.read_csv(races)
drivers = ('drivers.csv')
drivers = pd.read_csv(drivers)
current_point_system = ('currPointSystem.csv')
current_point_system = pd.read_csv(current_point_system)
data_driver_1 = results[results['driverId'] == 1]
data_driver_30 = results[results['driverId'] == 30]
data_driver_579 = results[results['driverId'] == 579]
data_driver_117 = results[results['driverId'] == 117]
data_driver_20 = results[results['driverId'] == 20]
data_driver_101 = results[results['driverId'] == 101]
data_driver_328 = results[results['driverId'] == 328]
data_driver_182 = results[results['driverId'] == 182]
data_driver_37 = results[results['driverId'] == 37]
data_driver_102 = results[results['driverId'] == 102]
data_driver_647 = results[results['driverId'] == 647]
data_driver_373 = results[results['driverId'] == 373]
data_driver_289 = results[results['driverId'] == 289]
data_driver_224 = results[results['driverId'] == 224]
data_driver_57 = results[results['driverId'] == 57]
data_driver_4 = results[results['driverId'] == 4]
data_driver_642 = results[results['driverId'] == 642]
data_driver_578 = results[results['driverId'] == 578]
data_driver_403 = results[results['driverId'] == 403]
data_driver_341 = results[results['driverId'] == 341]
data_driver_304 = results[results['driverId'] == 304]
data_driver_358 = results[results['driverId'] == 358]
data_driver_231 = results[results['driverId'] == 231]
data_driver_207 = results[results['driverId'] == 207]
data_driver_222 = results[results['driverId'] == 222]
data_driver_178 = results[results['driverId'] == 178]
data_driver_177 = results[results['driverId'] == 177]
data_driver_3 = results[results['driverId'] == 3]
data_driver_71 = results[results['driverId'] == 71]
data_driver_95 = results[results['driverId'] == 95]
data_driver_35 = results[results['driverId'] == 35]
data_driver_8 = results[results['driverId'] == 8]
data_driver_18 = results[results['driverId'] == 18]
data_driver_830 = results[results['driverId'] == 830]
frames_driver = [data_driver_1, data_driver_30, data_driver_579, data_driver_117,
data_driver_20, data_driver_101, data_driver_328, data_driver_182,
data_driver_37, data_driver_102, data_driver_647, data_driver_373,
data_driver_289, data_driver_224, data_driver_57, data_driver_4,
data_driver_642, data_driver_578, data_driver_403, data_driver_341,
data_driver_304, data_driver_358, data_driver_231, data_driver_207,
data_driver_222, data_driver_178, data_driver_177, data_driver_3,
data_driver_71, data_driver_95, data_driver_35, data_driver_8,
data_driver_18, data_driver_830]
result_driver = pd.concat(frames_driver)
idPil_race = result_driver.join(races.set_index('raceId'), on='raceId', lsuffix='caller', rsuffix='other')
idPil_race_driverName = idPil_race.join(drivers.set_index('driverId'), on='driverId', lsuffix='caller', rsuffix='other')
idPil_race_driverName['forename'] = idPil_race_driverName['forename'] + " " + idPil_race_driverName['surname']
idPil_race_driverName = idPil_race_driverName.drop(columns=['surname'], axis=1)
idPil_race_driverName['dob'] = pd.to_datetime(idPil_race_driverName['dob'], format='%Y-%m-%d')
idPil_race_driverName['date'] = pd.to_datetime(idPil_race_driverName['date'], format='%Y-%m-%d')
idPil_race_driverName['driver_age'] = ((idPil_race_driverName['date'] - idPil_race_driverName['dob']).dt.days) / 365.25
current_point_system['positionOrder'] = current_point_system['positionOrder'].astype('str')
idPil_race_driverName = idPil_race_driverName.join(current_point_system.set_index('positionOrder'), on='position', lsuffix='caller', rsuffix='other')
idPil_race_driverName = idPil_race_driverName.drop(columns=['raceId', 'constructorId', 'driverId', 'name', 'numbercaller', 'pointscaller', 'grid', 'position', 'positionText', 'positionOrder', 'laps', 'timecaller', 'milliseconds', 'fastestLap', 'rank', 'fastestLapTime', 'fastestLapSpeed', 'statusId', 'year', 'round', 'circuitId', 'date', 'timeother', 'urlcaller', 'driverRef', 'numberother', 'code', 'dob', 'nationality', 'urlother'], axis=1)
idPil_race_driverName = idPil_race_driverName.reset_index(drop=True)
idPil_race_driverName.loc[0:287, ['forename']] = 'ZZZLewis Hamilton'
idPil_race_driverName.loc[288:595, ['forename']] = 'ZZZMichael Schumacher'
idPil_race_driverName.loc[596:653, ['forename']] = 'ZZZJuan Fangio'
idPil_race_driverName.loc[654:855, ['forename']] = 'ZZZAlain Prost'
idPil_race_driverName.loc[856:1135, ['forename']] = 'ZZZSebastian Vettel'
idPil_race_driverName.loc[1547:1708, ['forename']] = 'ZZZAyrton Senna'
idPil_race_driverName = idPil_race_driverName.sort_values(['driver_age'], ascending=True)
idPil_race_driverName['pointsother'] = idPil_race_driverName['pointsother'].replace(np.nan, 0)
idPil_race_driverName['cumulative_points'] = idPil_race_driverName.groupby('forename')['pointsother'].cumsum()
fig1 = (ggplot(idPil_race_driverName, aes(x='driver_age', y='cumulative_points')) +
scale_x_continuous(labels=['20 years', '30 years', '40 years', '50 years']) +
geom_line(aes(group='forename', color='forename', size='forename')) +
scale_color_manual(values=['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
'grey', 'grey', 'grey', 'grey', 'darkorange', 'orchid',
'limegreen', 'turquoise', 'red', 'mediumblue']) +
scale_size_manual(values=[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2]) +
theme_minimal() + # or theme_bw() for a plain white background with grid lines
theme(panel_background=element_rect(fill="white", color="white")) +
theme(panel_grid_major=element_blank(), panel_grid_minor=element_blank()) +
theme(legend_position=('none')) +
theme(figure_size=(17, 12))
)
print(fig1)
fig1.save(filename=f'{directory}/ChampionsPoints.png', dpi=1200, width=17, height=12)
/var/folders/09/z4b8vf1j5gnb4sqltdm7hxh80000gn/T/ipykernel_6436/2551521217.py:107: FutureWarning: Using print(plot) to draw and show the plot figure is deprecated and will be removed in a future version. Use plot.show().
/Users/sainishanth/anaconda3/lib/python3.10/site-packages/plotnine/ggplot.py:606: PlotnineWarning: Saving 17 x 12 in image. /Users/sainishanth/anaconda3/lib/python3.10/site-packages/plotnine/ggplot.py:607: PlotnineWarning: Filename: images/ChampionsPoints.png
results = ('results.csv')
results = pd.read_csv(results)
races = ('races.csv')
races = pd.read_csv(races)
drivers = ('drivers.csv')
drivers = pd.read_csv(drivers)
print(results)
print(races)
print(drivers)
resultId raceId driverId constructorId number grid position \
0 1 18 1 1 22 1 1
1 2 18 2 2 3 5 2
2 3 18 3 3 7 7 3
3 4 18 4 4 5 11 4
4 5 18 5 1 23 3 5
... ... ... ... ... ... ... ...
25394 25400 1073 815 9 11 4 15
25395 25401 1073 849 3 6 16 \N
25396 25402 1073 841 51 99 14 \N
25397 25403 1073 847 3 63 17 \N
25398 25404 1073 8 51 7 18 \N
positionText positionOrder points laps time milliseconds \
0 1 1 10.0 58 1:34:50.616 5690616
1 2 2 8.0 58 +5.478 5696094
2 3 3 6.0 58 +8.163 5698779
3 4 4 5.0 58 +17.181 5707797
4 5 5 4.0 58 +18.014 5708630
... ... ... ... ... ... ...
25394 15 15 0.0 55 \N \N
25395 R 16 0.0 50 \N \N
25396 R 17 0.0 33 \N \N
25397 R 18 0.0 26 \N \N
25398 R 19 0.0 25 \N \N
fastestLap rank fastestLapTime fastestLapSpeed statusId
0 39 2 1:27.452 218.300 1
1 41 3 1:27.739 217.586 1
2 41 5 1:28.090 216.719 1
3 58 7 1:28.603 215.464 1
4 43 1 1:27.418 218.385 1
... ... ... ... ... ...
25394 51 2 1:26.419 219.993 5
25395 30 15 1:29.293 212.912 3
25396 33 16 1:29.442 212.557 6
25397 23 19 1:30.647 209.732 6
25398 23 18 1:29.698 211.951 23
[25399 rows x 18 columns]
raceId year round circuitId name date \
0 1 2009 1 1 Australian Grand Prix 2009-03-29
1 2 2009 2 2 Malaysian Grand Prix 2009-04-05
2 3 2009 3 17 Chinese Grand Prix 2009-04-19
3 4 2009 4 3 Bahrain Grand Prix 2009-04-26
4 5 2009 5 4 Spanish Grand Prix 2009-05-10
... ... ... ... ... ... ...
1075 1092 2022 19 22 Japanese Grand Prix 2022-10-09
1076 1093 2022 20 69 United States Grand Prix 2022-10-23
1077 1094 2022 21 32 Mexico City Grand Prix 2022-10-30
1078 1095 2022 22 18 São Paulo Grand Prix 2022-11-13
1079 1096 2022 23 24 Abu Dhabi Grand Prix 2022-11-20
time url
0 06:00:00 http://en.wikipedia.org/wiki/2009_Australian_G...
1 09:00:00 http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2 07:00:00 http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3 12:00:00 http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4 12:00:00 http://en.wikipedia.org/wiki/2009_Spanish_Gran...
... ... ...
1075 05:10:00 https://en.wikipedia.org/wiki/2022_Japanese_Gr...
1076 19:00:00 https://en.wikipedia.org/wiki/2022_United_Stat...
1077 19:00:00 https://en.wikipedia.org/wiki/2022_Mexican_Gra...
1078 17:00:00 https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...
1079 13:00:00 https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...
[1080 rows x 8 columns]
driverId driverRef number code forename surname dob \
0 1 hamilton 44 HAM Lewis Hamilton 1985-01-07
1 2 heidfeld \N HEI Nick Heidfeld 1977-05-10
2 3 rosberg 6 ROS Nico Rosberg 1985-06-27
3 4 alonso 14 ALO Fernando Alonso 1981-07-29
4 5 kovalainen \N KOV Heikki Kovalainen 1981-10-19
.. ... ... ... ... ... ... ...
849 851 aitken 89 AIT Jack Aitken 1995-09-23
850 852 tsunoda 22 TSU Yuki Tsunoda 2000-05-11
851 853 mazepin 9 MAZ Nikita Mazepin 1999-03-02
852 854 mick_schumacher 47 MSC Mick Schumacher 1999-03-22
853 855 zhou \N ZHO Guanyu Zhou 1999-05-30
nationality url
0 British http://en.wikipedia.org/wiki/Lewis_Hamilton
1 German http://en.wikipedia.org/wiki/Nick_Heidfeld
2 German http://en.wikipedia.org/wiki/Nico_Rosberg
3 Spanish http://en.wikipedia.org/wiki/Fernando_Alonso
4 Finnish http://en.wikipedia.org/wiki/Heikki_Kovalainen
.. ... ...
849 British http://en.wikipedia.org/wiki/Jack_Aitken
850 Japanese http://en.wikipedia.org/wiki/Yuki_Tsunoda
851 Russian http://en.wikipedia.org/wiki/Nikita_Mazepin
852 German http://en.wikipedia.org/wiki/Mick_Schumacher
853 Chinese https://en.wikipedia.org/wiki/Guanyu_Zhou
[854 rows x 9 columns]
import plotly.graph_objects as go
fig2 = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=2),
label=["Alain Prost", "Ayrton Senna", "Juan Manuel Fangio", "Lewis Hamilton", "Michael Schumacher", "Sebastian Vettel",
"Finished race", "Race not finished", "Lapped",
"Accident", "Mechanical issue", "Wheel problem", "Technical/electronic issue", "Not classified"],
color="white",
# fixed positions
x=[0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9, 0.9, 0.9],
y=[0, 0.2, 0.4, 0.6, 0.8, 1, 0.3, 0.9, 0.7, 0.2, 0.4, 0.6, 0.8, 1]
),
link=dict(
source=[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
target=[6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13],
value=[119, 85, 37, 251, 219, 213, 63, 62, 14, 26, 68, 42, 18, 12, 7, 9, 18, 24, 20, 27, 0, 13, 3, 22, 22, 3, 14, 1, 1, 11, 0, 2, 0, 13, 8, 2, 2, 1, 33, 22, 5, 6, 2, 17, 15, 4, 5, 1],
color=['darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue', 'darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue', 'darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue',
'darkorange', 'darkorange', 'darkorange', 'darkorange', 'darkorange', 'orchid', 'orchid', 'orchid', 'orchid', 'orchid',
'limegreen', 'limegreen', 'limegreen', 'limegreen', 'limegreen', 'turquoise', 'turquoise', 'turquoise', 'turquoise', 'turquoise',
'red', 'red', 'red', 'red', 'red', 'mediumblue', 'mediumblue', 'mediumblue', 'mediumblue', 'mediumblue']
),
)],
layout=dict(
title=dict(
text='',
font=dict(size=17, family='Georgia'),
x=0,
y=0.98
),
)
)
fig2
fig3 = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 2),
label = ["Lewis Hamilton", "Sebastian Vettel",
"Race finished", "Race not finished", "Lapped", "Race finished", "Race not finished", "Lapped",
"Accident", "Mechanical issue", "Wheel issue", "Technical/electronic issue", "Not classified",
"Accident", "Mechanical issue", "Wheel issue", "Technical/electronic issue", "Not classified"],
color = ["turquoise", "red", "turquoise", "turquoise", "turquoise", "red", "red", "red", "turquoise", "turquoise", "turquoise", "turquoise", "turquoise", "red", "red", "red", "red", "red"],
# fixed positions
x = [0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9],
y = [0.2, 0.6, 0.1, 0.5, 0.35, 0.6, 0.9, 0.75, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
),
link = dict(
source = [0, 1, 0, 1, 0, 1, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6], # indices correspond to labels
target = [2, 5, 3, 6, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
value = [251, 219, 26, 68, 9, 18, 13, 8, 2, 2, 1, 33, 22, 5, 6, 2],
#color = ['turquoise', 'red', 'turquoise', 'purple', 'turquoise', 'red',
# 'turquoise', 'turquoise', 'turquoise', 'turquoise', 'turquoise', 'red', 'red', 'red', 'red', 'red']
),)],
layout = dict(
title = dict(
text = '',
font = dict(size = 17, family = 'Georgia'),
x=0,
y=0.98
),
)
)
fig3
circuits = pd.read_csv('circuits.csv')
print(circuits)
circuitId circuitRef name \
0 1 albert_park Albert Park Grand Prix Circuit
1 2 sepang Sepang International Circuit
2 3 bahrain Bahrain International Circuit
3 4 catalunya Circuit de Barcelona-Catalunya
4 5 istanbul Istanbul Park
.. ... ... ...
74 75 portimao Autódromo Internacional do Algarve
75 76 mugello Autodromo Internazionale del Mugello
76 77 jeddah Jeddah Street Circuit
77 78 losail Losail International Circuit
78 79 miami Miami International Autodrome
location country lat lng alt \
0 Melbourne Australia -37.84970 144.96800 10
1 Kuala Lumpur Malaysia 2.76083 101.73800 18
2 Sakhir Bahrain 26.03250 50.51060 7
3 Montmeló Spain 41.57000 2.26111 109
4 Istanbul Turkey 40.95170 29.40500 130
.. ... ... ... ... ...
74 Portimão Portugal 37.22700 -8.62670 108
75 Mugello Italy 43.99750 11.37190 255
76 Jeddah Saudi Arabia 21.54330 39.17280 15
77 Al Daayen Qatar 25.49000 51.45420 \N
78 Miami United States 25.95810 -80.23890 \N
url
0 http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1 http://en.wikipedia.org/wiki/Sepang_Internatio...
2 http://en.wikipedia.org/wiki/Bahrain_Internati...
3 http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4 http://en.wikipedia.org/wiki/Istanbul_Park
.. ...
74 http://en.wikipedia.org/wiki/Algarve_Internati...
75 http://en.wikipedia.org/wiki/Mugello_Circuit
76 http://en.wikipedia.org/wiki/Jeddah_Street_Cir...
77 http://en.wikipedia.org/wiki/Losail_Internatio...
78 https://en.wikipedia.org/wiki/Miami_Internatio...
[79 rows x 9 columns]
import pandas as pd
import altair as alt
import altair_saver
# Load the circuits data
circuits = pd.read_csv('circuits.csv')
# Filter out circuits with missing latitude or longitude values
circuits = circuits[(circuits['lat'] != '\\N') & (circuits['lng'] != '\\N')]
# Convert latitude and longitude columns to float
circuits['lat'] = circuits['lat'].astype(float)
circuits['lng'] = circuits['lng'].astype(float)
# Define the geoplot for circuits
geoplot_circuits = alt.Chart(circuits).mark_circle().encode(
longitude='lng:Q',
latitude='lat:Q',
size=alt.value(100), # Size of the circles
color=alt.Color('country:N', legend=None), # Color by country
tooltip=['name', 'country'] # Display circuit name and country on hover
).properties(
width=800,
height=400,
title='Formula 1 Circuits Around the World'
).project(
type='mercator' # Projection type
)
# Define the world map
world_map = alt.Chart(alt.topo_feature('https://vega.github.io/vega-datasets/data/world-110m.json', 'countries')).mark_geoshape(
fill='lightgray',
stroke='white'
).properties(
width=800,
height=400
)
# Combine the geoplot of circuits and the world map
combined_plot = (world_map + geoplot_circuits)
combined_plot
constructor_results = pd.read_csv('constructor_results.csv')
constructors = pd.read_csv('constructors.csv')
races = pd.read_csv('races.csv')
print(constructor_results)
print(races)
print(constructors)
constructorResultsId raceId constructorId points status
0 1 18 1 14.0 \N
1 2 18 2 8.0 \N
2 3 18 3 9.0 \N
3 4 18 4 5.0 \N
4 5 18 5 2.0 \N
... ... ... ... ... ...
11945 16445 1073 214 6.0 \N
11946 16446 1073 117 0.0 \N
11947 16447 1073 210 0.0 \N
11948 16448 1073 3 0.0 \N
11949 16449 1073 51 0.0 \N
[11950 rows x 5 columns]
raceId year round circuitId name date \
0 1 2009 1 1 Australian Grand Prix 2009-03-29
1 2 2009 2 2 Malaysian Grand Prix 2009-04-05
2 3 2009 3 17 Chinese Grand Prix 2009-04-19
3 4 2009 4 3 Bahrain Grand Prix 2009-04-26
4 5 2009 5 4 Spanish Grand Prix 2009-05-10
... ... ... ... ... ... ...
1075 1092 2022 19 22 Japanese Grand Prix 2022-10-09
1076 1093 2022 20 69 United States Grand Prix 2022-10-23
1077 1094 2022 21 32 Mexico City Grand Prix 2022-10-30
1078 1095 2022 22 18 São Paulo Grand Prix 2022-11-13
1079 1096 2022 23 24 Abu Dhabi Grand Prix 2022-11-20
time url
0 06:00:00 http://en.wikipedia.org/wiki/2009_Australian_G...
1 09:00:00 http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2 07:00:00 http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3 12:00:00 http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4 12:00:00 http://en.wikipedia.org/wiki/2009_Spanish_Gran...
... ... ...
1075 05:10:00 https://en.wikipedia.org/wiki/2022_Japanese_Gr...
1076 19:00:00 https://en.wikipedia.org/wiki/2022_United_Stat...
1077 19:00:00 https://en.wikipedia.org/wiki/2022_Mexican_Gra...
1078 17:00:00 https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...
1079 13:00:00 https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...
[1080 rows x 8 columns]
constructorId constructorRef name nationality \
0 1 mclaren McLaren British
1 2 bmw_sauber BMW Sauber German
2 3 williams Williams British
3 4 renault Renault French
4 5 toro_rosso Toro Rosso Italian
.. ... ... ... ...
206 209 manor Manor Marussia British
207 210 haas Haas F1 Team American
208 211 racing_point Racing Point British
209 213 alphatauri AlphaTauri Italian
210 214 alpine Alpine F1 Team French
url
0 http://en.wikipedia.org/wiki/McLaren
1 http://en.wikipedia.org/wiki/BMW_Sauber
2 http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3 http://en.wikipedia.org/wiki/Renault_in_Formul...
4 http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso
.. ...
206 http://en.wikipedia.org/wiki/Manor_Motorsport
207 http://en.wikipedia.org/wiki/Haas_F1_Team
208 http://en.wikipedia.org/wiki/Racing_Point_F1_Team
209 http://en.wikipedia.org/wiki/Scuderia_AlphaTauri
210 http://en.wikipedia.org/wiki/Alpine_F1_Team
[211 rows x 5 columns]
import pandas as pd
import altair as alt
constructor_results = pd.read_csv('constructor_results.csv')
constructors = pd.read_csv('constructors.csv')
# Merge dataframes to get constructor names and points
merged_data = pd.merge(constructor_results, constructors, on='constructorId')
# Sum up the points for each constructor
constructor_points = merged_data.groupby('name')['points'].sum().reset_index()
# Sort the constructors by total points
constructor_points_sorted = constructor_points.sort_values(by='points', ascending=False)
# Create bar chart
bar_chart = alt.Chart(constructor_points_sorted.head(10)).mark_bar().encode(
x=alt.X('points:Q', axis=alt.Axis(title='Total Points')),
y=alt.Y('name:N', sort='-x', axis=alt.Axis(title='Constructor')),
color=alt.Color('name:N', legend=None),
tooltip=['name', 'points']
).properties(
width=800,
height=400,
title='Top 10 Constructors by Total Points'
)
bar_chart
import pandas as pd
import altair as alt
# Assuming you have already loaded the data into DataFrames
# constructor_results = pd.read_csv('constructor_results.csv')
# constructors = pd.read_csv('constructors.csv')
# races = pd.read_csv('races.csv')
# Merge constructor_results with races to get the year information
merged_data = pd.merge(constructor_results, races, on='raceId')
# Merge the merged_data with constructors to get the constructor names
merged_data = pd.merge(merged_data, constructors, on='constructorId')
print(merged_data)
constructorResultsId raceId constructorId points status year \
0 1 18 1 14.0 \N 2008
1 2 18 2 8.0 \N 2008
2 3 18 3 9.0 \N 2008
3 4 18 4 5.0 \N 2008
4 5 18 5 2.0 \N 2008
... ... ... ... ... ... ...
11945 16445 1073 214 6.0 \N 2021
11946 16446 1073 117 0.0 \N 2021
11947 16447 1073 210 0.0 \N 2021
11948 16448 1073 3 0.0 \N 2021
11949 16449 1073 51 0.0 \N 2021
round circuitId name_x date time \
0 1 1 Australian Grand Prix 2008-03-16 04:30:00
1 1 1 Australian Grand Prix 2008-03-16 04:30:00
2 1 1 Australian Grand Prix 2008-03-16 04:30:00
3 1 1 Australian Grand Prix 2008-03-16 04:30:00
4 1 1 Australian Grand Prix 2008-03-16 04:30:00
... ... ... ... ... ...
11945 22 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
11946 22 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
11947 22 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
11948 22 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
11949 22 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
url_x constructorRef \
0 http://en.wikipedia.org/wiki/2008_Australian_G... mclaren
1 http://en.wikipedia.org/wiki/2008_Australian_G... bmw_sauber
2 http://en.wikipedia.org/wiki/2008_Australian_G... williams
3 http://en.wikipedia.org/wiki/2008_Australian_G... renault
4 http://en.wikipedia.org/wiki/2008_Australian_G... toro_rosso
... ... ...
11945 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr... alpine
11946 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr... aston_martin
11947 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr... haas
11948 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr... williams
11949 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr... alfa
name_y nationality \
0 McLaren British
1 BMW Sauber German
2 Williams British
3 Renault French
4 Toro Rosso Italian
... ... ...
11945 Alpine F1 Team French
11946 Aston Martin British
11947 Haas F1 Team American
11948 Williams British
11949 Alfa Romeo Swiss
url_y
0 http://en.wikipedia.org/wiki/McLaren
1 http://en.wikipedia.org/wiki/BMW_Sauber
2 http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3 http://en.wikipedia.org/wiki/Renault_in_Formul...
4 http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso
... ...
11945 http://en.wikipedia.org/wiki/Alpine_F1_Team
11946 http://en.wikipedia.org/wiki/Aston_Martin_in_F...
11947 http://en.wikipedia.org/wiki/Haas_F1_Team
11948 http://en.wikipedia.org/wiki/Williams_Grand_Pr...
11949 http://en.wikipedia.org/wiki/Alfa_Romeo_in_For...
[11950 rows x 16 columns]
# Group by year and find the maximum points
max_points_per_year = merged_data.groupby('year')['points'].max().reset_index()
# Find the corresponding constructor for each maximum points entry
winning_constructors = merged_data.loc[merged_data.groupby('year')['points'].idxmax(), ['year', 'name_y']].reset_index(drop=True)
# Merge the maximum points and winning constructors data
max_points_per_year = pd.merge(max_points_per_year, winning_constructors, on='year')
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line().encode(
x='year:O',
y='points:Q',
color=alt.value('blue'), # Fixed color for the line
tooltip=['year', 'points', 'name_y'] # Show year, points, and name of the winning constructor on hover
).properties(
width=600,
height=400,
title="Maximum Points Scored by Any Constructor Each Year"
)
# Filter out the data where Ferrari has won
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']
# Annotate Ferrari wins
point_annotations = alt.Chart(ferrari_wins).mark_text(
align='left',
baseline='middle',
dx=5, # Adjust text position
dy=-10, # Adjust text position
fontSize=10 # Adjust text size
).encode(
x='year:O',
y='points:Q',
text=alt.value('Ferrari') # Annotate with 'Ferrari'
)
line_chart + point_annotations
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='red').encode(
x='year:O',
y='points:Q',
tooltip=['year', 'points', 'name_y'] # Show year, points, and name of the winning constructor on hover
).properties(
width=600,
height=400,
title="Maximum Points Scored by Any Constructor Each Year"
)
# Filter out the data where Ferrari has won
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']
# Annotate Ferrari wins with circle markers in red
point_annotations = alt.Chart(ferrari_wins).mark_circle(color='red', size=100).encode(
x='year:O',
y='points:Q',
)
line_chart + point_annotations
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='turquoise').encode(
x='year:O',
y='points:Q',
tooltip=['year', 'points', 'name_y'] # Show year, points, and name of the winning constructor on hover
).properties(
width=600,
height=400,
title="Maximum Points Scored by Any Constructor Each Year"
)
# Filter out the data where Mercedes has won
mercedes_wins = max_points_per_year[max_points_per_year['name_y'] == 'Mercedes']
# Annotate Mercedes wins with circle markers in turquoise
point_annotations = alt.Chart(mercedes_wins).mark_circle(color='turquoise', size=100).encode(
x='year:O',
y='points:Q',
)
line_chart + point_annotations
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='navy').encode(
x='year:O',
y='points:Q',
tooltip=['year', 'points', 'name_y'] # Show year, points, and name of the winning constructor on hover
).properties(
width=600,
height=400,
title="Maximum Points Scored by Any Constructor Each Year"
)
# Filter out the data where Red Bull has won
redbull_wins = max_points_per_year[max_points_per_year['name_y'] == 'Red Bull']
# Annotate Red Bull wins with circle markers in navy blue
point_annotations = alt.Chart(redbull_wins).mark_circle(color='navy', size=100).encode(
x='year:O',
y='points:Q',
)
line_chart + point_annotations
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='navy').encode(
x='year:O',
y='points:Q',
tooltip=['year', 'points', 'name_y'] # Show year, points, and name of the winning constructor on hover
).properties(
width=600,
height=400,
title="Maximum Points Scored by Any Constructor Each Year"
)
# Filter out the data where Mercedes, Red Bull, and Ferrari have won
mercedes_wins = max_points_per_year[max_points_per_year['name_y'] == 'Mercedes']
redbull_wins = max_points_per_year[max_points_per_year['name_y'] == 'Red Bull']
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']
# Annotate Mercedes wins with circle markers
mercedes_markers = alt.Chart(mercedes_wins).mark_circle(color='turquoise', size=100).encode(
x='year:O',
y='points:Q',
)
# Annotate Red Bull wins with circle markers
redbull_markers = alt.Chart(redbull_wins).mark_circle(color='navy', size=100).encode(
x='year:O',
y='points:Q',
)
# Annotate Ferrari wins with circle markers
ferrari_markers = alt.Chart(ferrari_wins).mark_circle(color='red', size=100).encode(
x='year:O',
y='points:Q',
)
line_chart + mercedes_markers + redbull_markers + ferrari_markers
print(drivers)
print(results)
print(races)
driverId driverRef number code forename surname dob \
0 1 hamilton 44 HAM Lewis Hamilton 1985-01-07
1 2 heidfeld \N HEI Nick Heidfeld 1977-05-10
2 3 rosberg 6 ROS Nico Rosberg 1985-06-27
3 4 alonso 14 ALO Fernando Alonso 1981-07-29
4 5 kovalainen \N KOV Heikki Kovalainen 1981-10-19
.. ... ... ... ... ... ... ...
849 851 aitken 89 AIT Jack Aitken 1995-09-23
850 852 tsunoda 22 TSU Yuki Tsunoda 2000-05-11
851 853 mazepin 9 MAZ Nikita Mazepin 1999-03-02
852 854 mick_schumacher 47 MSC Mick Schumacher 1999-03-22
853 855 zhou \N ZHO Guanyu Zhou 1999-05-30
nationality url
0 British http://en.wikipedia.org/wiki/Lewis_Hamilton
1 German http://en.wikipedia.org/wiki/Nick_Heidfeld
2 German http://en.wikipedia.org/wiki/Nico_Rosberg
3 Spanish http://en.wikipedia.org/wiki/Fernando_Alonso
4 Finnish http://en.wikipedia.org/wiki/Heikki_Kovalainen
.. ... ...
849 British http://en.wikipedia.org/wiki/Jack_Aitken
850 Japanese http://en.wikipedia.org/wiki/Yuki_Tsunoda
851 Russian http://en.wikipedia.org/wiki/Nikita_Mazepin
852 German http://en.wikipedia.org/wiki/Mick_Schumacher
853 Chinese https://en.wikipedia.org/wiki/Guanyu_Zhou
[854 rows x 9 columns]
resultId raceId driverId constructorId number grid position \
0 1 18 1 1 22 1 1
1 2 18 2 2 3 5 2
2 3 18 3 3 7 7 3
3 4 18 4 4 5 11 4
4 5 18 5 1 23 3 5
... ... ... ... ... ... ... ...
25394 25400 1073 815 9 11 4 15
25395 25401 1073 849 3 6 16 \N
25396 25402 1073 841 51 99 14 \N
25397 25403 1073 847 3 63 17 \N
25398 25404 1073 8 51 7 18 \N
positionText positionOrder points laps time milliseconds \
0 1 1 10.0 58 1:34:50.616 5690616
1 2 2 8.0 58 +5.478 5696094
2 3 3 6.0 58 +8.163 5698779
3 4 4 5.0 58 +17.181 5707797
4 5 5 4.0 58 +18.014 5708630
... ... ... ... ... ... ...
25394 15 15 0.0 55 \N \N
25395 R 16 0.0 50 \N \N
25396 R 17 0.0 33 \N \N
25397 R 18 0.0 26 \N \N
25398 R 19 0.0 25 \N \N
fastestLap rank fastestLapTime fastestLapSpeed statusId
0 39 2 1:27.452 218.300 1
1 41 3 1:27.739 217.586 1
2 41 5 1:28.090 216.719 1
3 58 7 1:28.603 215.464 1
4 43 1 1:27.418 218.385 1
... ... ... ... ... ...
25394 51 2 1:26.419 219.993 5
25395 30 15 1:29.293 212.912 3
25396 33 16 1:29.442 212.557 6
25397 23 19 1:30.647 209.732 6
25398 23 18 1:29.698 211.951 23
[25399 rows x 18 columns]
raceId year round circuitId name date \
0 1 2009 1 1 Australian Grand Prix 2009-03-29
1 2 2009 2 2 Malaysian Grand Prix 2009-04-05
2 3 2009 3 17 Chinese Grand Prix 2009-04-19
3 4 2009 4 3 Bahrain Grand Prix 2009-04-26
4 5 2009 5 4 Spanish Grand Prix 2009-05-10
... ... ... ... ... ... ...
1075 1092 2022 19 22 Japanese Grand Prix 2022-10-09
1076 1093 2022 20 69 United States Grand Prix 2022-10-23
1077 1094 2022 21 32 Mexico City Grand Prix 2022-10-30
1078 1095 2022 22 18 São Paulo Grand Prix 2022-11-13
1079 1096 2022 23 24 Abu Dhabi Grand Prix 2022-11-20
time url
0 06:00:00 http://en.wikipedia.org/wiki/2009_Australian_G...
1 09:00:00 http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2 07:00:00 http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3 12:00:00 http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4 12:00:00 http://en.wikipedia.org/wiki/2009_Spanish_Gran...
... ... ...
1075 05:10:00 https://en.wikipedia.org/wiki/2022_Japanese_Gr...
1076 19:00:00 https://en.wikipedia.org/wiki/2022_United_Stat...
1077 19:00:00 https://en.wikipedia.org/wiki/2022_Mexican_Gra...
1078 17:00:00 https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...
1079 13:00:00 https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...
[1080 rows x 8 columns]
# Merge drivers and results on driverId
merged_driver_results = pd.merge(drivers, results, on='driverId')
# Merge the above dataframe with races on raceId
merged_data = pd.merge(merged_driver_results, races, on='raceId')
print(merged_data)
driverId driverRef number_x code forename surname \
0 1 hamilton 44 HAM Lewis Hamilton
1 1 hamilton 44 HAM Lewis Hamilton
2 1 hamilton 44 HAM Lewis Hamilton
3 1 hamilton 44 HAM Lewis Hamilton
4 1 hamilton 44 HAM Lewis Hamilton
... ... ... ... ... ... ...
25394 854 mick_schumacher 47 MSC Mick Schumacher
25395 854 mick_schumacher 47 MSC Mick Schumacher
25396 854 mick_schumacher 47 MSC Mick Schumacher
25397 854 mick_schumacher 47 MSC Mick Schumacher
25398 854 mick_schumacher 47 MSC Mick Schumacher
dob nationality url_x \
0 1985-01-07 British http://en.wikipedia.org/wiki/Lewis_Hamilton
1 1985-01-07 British http://en.wikipedia.org/wiki/Lewis_Hamilton
2 1985-01-07 British http://en.wikipedia.org/wiki/Lewis_Hamilton
3 1985-01-07 British http://en.wikipedia.org/wiki/Lewis_Hamilton
4 1985-01-07 British http://en.wikipedia.org/wiki/Lewis_Hamilton
... ... ... ...
25394 1999-03-22 German http://en.wikipedia.org/wiki/Mick_Schumacher
25395 1999-03-22 German http://en.wikipedia.org/wiki/Mick_Schumacher
25396 1999-03-22 German http://en.wikipedia.org/wiki/Mick_Schumacher
25397 1999-03-22 German http://en.wikipedia.org/wiki/Mick_Schumacher
25398 1999-03-22 German http://en.wikipedia.org/wiki/Mick_Schumacher
resultId ... fastestLapTime fastestLapSpeed statusId year round \
0 1 ... 1:27.452 218.300 1 2008 1
1 27 ... 1:35.462 209.033 1 2008 2
2 57 ... 1:35.520 203.969 11 2008 3
3 69 ... 1:22.017 204.323 1 2008 4
4 90 ... 1:26.529 222.085 1 2008 5
... ... ... ... ... ... ... ...
25394 25324 ... \N \N 4 2021 18
25395 25343 ... 1:13.793 210.215 12 2021 19
25396 25361 ... 1:28.412 219.065 11 2021 20
25397 25385 ... 1:36.043 231.421 3 2021 21
25398 25399 ... 1:29.457 212.522 11 2021 22
circuitId name date time_y \
0 1 Australian Grand Prix 2008-03-16 04:30:00
1 2 Malaysian Grand Prix 2008-03-23 07:00:00
2 3 Bahrain Grand Prix 2008-04-06 11:30:00
3 4 Spanish Grand Prix 2008-04-27 12:00:00
4 5 Turkish Grand Prix 2008-05-11 12:00:00
... ... ... ... ...
25394 32 Mexico City Grand Prix 2021-11-07 19:00:00
25395 18 São Paulo Grand Prix 2021-11-14 17:00:00
25396 78 Qatar Grand Prix 2021-11-21 14:00:00
25397 77 Saudi Arabian Grand Prix 2021-12-05 17:30:00
25398 24 Abu Dhabi Grand Prix 2021-12-12 13:00:00
url_y
0 http://en.wikipedia.org/wiki/2008_Australian_G...
1 http://en.wikipedia.org/wiki/2008_Malaysian_Gr...
2 http://en.wikipedia.org/wiki/2008_Bahrain_Gran...
3 http://en.wikipedia.org/wiki/2008_Spanish_Gran...
4 http://en.wikipedia.org/wiki/2008_Turkish_Gran...
... ...
25394 http://en.wikipedia.org/wiki/2021_Mexican_Gran...
25395 http://en.wikipedia.org/wiki/São_Paulo_Grand_Prix
25396 http://en.wikipedia.org/wiki/2021_Qatar_Grand_...
25397 http://en.wikipedia.org/wiki/2021_Saudi_Arabia...
25398 http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...
[25399 rows x 33 columns]
import altair as alt
# Filter data for Lewis Hamilton and Sebastian Vettel
hamilton_data = merged_data[merged_data['driverRef'] == 'hamilton']
vettel_data = merged_data[merged_data['driverRef'] == 'vettel']
# Aggregate points per year for both drivers
hamilton_points = hamilton_data.groupby('year')['points'].sum().reset_index()
vettel_points = vettel_data.groupby('year')['points'].sum().reset_index()
# Rename columns for clarity
hamilton_points.columns = ['year', 'Lewis Hamilton']
vettel_points.columns = ['year', 'Sebastian Vettel']
# Merge the data for plotting
merged_points = pd.merge(hamilton_points, vettel_points, on='year')
print(merged_points)
year Lewis Hamilton Sebastian Vettel 0 2007 109.0 6.0 1 2008 98.0 35.0 2 2009 49.0 84.0 3 2010 240.0 256.0 4 2011 227.0 392.0 5 2012 190.0 281.0 6 2013 189.0 397.0 7 2014 384.0 167.0 8 2015 381.0 278.0 9 2016 380.0 212.0 10 2017 363.0 317.0 11 2018 408.0 320.0 12 2019 413.0 240.0 13 2020 347.0 33.0 14 2021 387.5 43.0
# Melt the DataFrame for Altair plotting
melted_points = merged_points.melt('year', var_name='driver', value_name='points')
# Create the bar chart
bar_chart = alt.Chart(melted_points).mark_bar().encode(
x='year:O',
y='points:Q',
color='driver:N'
).properties(
width=600,
height=400,
title="Comparison of Total Points: Lewis Hamilton vs Sebastian Vettel"
)
bar_chart
melted_points = merged_points.melt('year', var_name='driver', value_name='points')
# Create the bar chart
bar_chart = alt.Chart(melted_points).mark_bar().encode(
x='year:O',
y='points:Q',
color=alt.Color('driver:N', scale=alt.Scale(domain=['Lewis Hamilton', 'Sebastian Vettel'], range=['turquoise', 'red']))
).properties(
width=600,
height=400,
title="Comparison of Total Points: Lewis Hamilton vs Sebastian Vettel"
)
bar_chart